library(stringr)
library(feather)
library(koRpus)
library(tidyverse)
library(lme4)
library(dplyr)
library(sjPlot)
library(corrplot)
library(tidytext)
library(tm)
Compare by Plotting Growth Curve
childes_all <- read_feather("/Users/Yawen/Desktop/lexical diversity/trial5_ldp/childes_all.feather") %>%
filter(age >=14 & age <=58) # align age range with that of LDP
ldp_all <- read_feather("/Users/Yawen/Desktop/lexical diversity/trial5_ldp/ldp_all.feather")
childes_all %>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_vocd), color="vocd"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mtld), color="mtld"), se=F)+
theme_classic()+
labs(title = "Growth Curve of CHILDES Children",
subtitle = "14 ~ 58 Months",
y = "lexical diversity (scaled)")

# compare with CDI
ldp_all%>%
group_by(subject)%>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_vocd), color="vocd"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mtld), color="mtld"), se=F)+
geom_smooth(aes(x=age, y=scale(cdi), color="CDI"),se=F)+
xlim(14,30)+
theme_classic()+
labs(title = "Compare Lexical Diversity Indices with CDI",
subtitle = "LDP: 18 ~ 30 Months",
y = "lexical diversity (scaled)")

# compare with PPVT
ldp_all%>%
group_by(subject)%>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_vocd), color="vocd"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mtld), color="mtld"), se=F)+
geom_smooth(aes(x=age, y=scale(ppvt), color="PPVT"),se=F)+
xlim(30, 53)+
theme_classic()+
labs(title = "Compare Lexical Diversity Indices with PPVT",
subtitle = "LDP: 30 ~ 53 Months",
y = "lexical diversity (scaled)")

# TTR vs MATTR
childes_all %>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
theme_classic()+
labs(title = "Growth Curve by TTR & MATTR",
subtitle = "CHILDES: 14 ~ 58 Months",
y = "lexical diversity (scaled)")

ldp_all %>%
ggplot()+
geom_smooth(aes(x=age,y=scale(kid_ttr), color="ttr"), se=F)+
geom_smooth(aes(x=age,y=scale(kid_mattr), color="mattr"), se=F)+
theme_classic()+
labs(title = "Children's Growth Curve by TTR & MATTR",
subtitle = "LDP: 14 ~ 58 Months",
y = "lexical diversity (scaled)")

# MTLD vs vocd-D (Kid)
childes_all %>%
ggplot()+
geom_smooth(aes(x=age,y=kid_vocd, color="vocd"), se=F)+
geom_smooth(aes(x=age,y=kid_mtld, color="mtld"), se=F)+
theme_classic()+
labs(title = "Children's Growth Curve by MTLD & vocd-D",
subtitle = "CHILDES: 14 ~ 58 Months",
y = "lexical diversity (scaled)")

ldp_all %>%
ggplot()+
geom_smooth(aes(x=age,y=kid_vocd, color="vocd"), se=F)+
geom_smooth(aes(x=age,y=kid_mtld, color="mtld"), se=F)+
theme_classic()+
labs(title = "Children's Growth Curve by MTLD & vocd-D",
subtitle = "LDP: 14 ~ 58 Months",
y = "lexical diversity (scaled)")

# MTLD vs vocd-D (Mother)
childes_all %>%
ggplot()+
geom_smooth(aes(x=age,y=mom_vocd, color="vocd"), se=F)+
geom_smooth(aes(x=age,y=mom_mtld, color="mtld"), se=F)+
theme_classic()+
labs(title = "Mother's Growth Curve by MTLD & vocd-D",
subtitle = "CHILDES: 14 ~ 58 Months",
y = "lexical diversity (scaled)")

ldp_all %>%
ggplot()+
geom_smooth(aes(x=age,y=mom_vocd, color="vocd"), se=F)+
geom_smooth(aes(x=age,y=mom_mtld, color="mtld"), se=F)+
theme_classic()+
labs(title = "Mother's Growth Curve by MTLD & vocd-D",
subtitle = "LDP: 14 ~ 58 Months",
y = "lexical diversity (scaled)")

Compare by Variance
# variance of children's intercept
ldp_intercept %>%
ungroup(.)%>%
gather(measure, value,
cdi_intercept, ppvt_intercept, mtld_intercept,
mattr_intercept, vocd_intercept, ttr_intercept, sen_intercept) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
variance = sd/mean)
## # A tibble: 7 x 4
## measure mean sd variance
## <chr> <dbl> <dbl> <dbl>
## 1 cdi_intercept 497.5050370 152.25178818 0.30603065
## 2 mattr_intercept 0.4157072 0.04125706 0.09924548
## 3 mtld_intercept 12.6331234 2.43570136 0.19280278
## 4 ppvt_intercept 27.4537313 10.91072920 0.39742245
## 5 sen_intercept 19.2577319 11.35482955 0.58962445
## 6 ttr_intercept 0.1934215 0.02758366 0.14260906
## 7 vocd_intercept 29.1426028 1.78626631 0.06129399
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mtld_intercept,mattr_intercept, vocd_intercept, ttr_intercept) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
variance = sd/mean)
## # A tibble: 4 x 4
## measure mean sd variance
## <chr> <dbl> <dbl> <dbl>
## 1 mattr_intercept 0.4720901 0.03828670 0.08110042
## 2 mtld_intercept 16.4089111 4.79871125 0.29244544
## 3 ttr_intercept 0.1760118 0.05015386 0.28494603
## 4 vocd_intercept 32.3733921 1.08000127 0.03336077
ldp_intercept %>%
gather(measure, value,
cdi_intercept, ppvt_intercept, mtld_intercept,
mattr_intercept, vocd_intercept, ttr_intercept, sen_intercept) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure, scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Children's Intercept",
subtitle = "LDP: 14 ~ 58 Months")

childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mtld_intercept,mattr_intercept, vocd_intercept, ttr_intercept) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure, scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Children's Intercept",
subtitle = "CHILDES: 14 ~ 58 Months")

# variance of children's slope
ldp_intercept %>%
ungroup(.)%>%
gather(measure, value,
cdi_slope, ppvt_slope, mtld_slope, mattr_slope, vocd_slope, ttr_slope, sen_slope) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
variance =sd/mean)
## # A tibble: 7 x 4
## measure mean sd variance
## <chr> <dbl> <dbl> <dbl>
## 1 cdi_slope 836.3045407 131.54764297 0.1572963
## 2 mattr_slope 0.2279045 0.05044707 0.2213518
## 3 mtld_slope 18.0175755 2.87794470 0.1597299
## 4 ppvt_slope 77.4935435 18.14881161 0.2341977
## 5 sen_slope 39.0414878 19.12625515 0.4898957
## 6 ttr_slope -0.0282476 0.04617205 -1.6345475
## 7 vocd_slope 10.4458891 2.97448143 0.2847514
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mtld_slope, mattr_slope, vocd_slope, ttr_slope) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
variance =sd/mean)
## # A tibble: 4 x 4
## measure mean sd variance
## <chr> <dbl> <dbl> <dbl>
## 1 mattr_slope 0.17327564 0.08045816 0.4643362
## 2 mtld_slope 20.23156745 5.60486503 0.2770356
## 3 ttr_slope -0.02977205 0.09329748 -3.1337271
## 4 vocd_slope 4.61083231 5.07427255 1.1005112
ldp_intercept %>%
gather(measure, value,
cdi_slope, ppvt_slope, mtld_slope, mattr_slope, vocd_slope, ttr_slope, sen_slope) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure,scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Children's Slope",
subtitle = "LDP: 14 ~ 58 Months")

childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mtld_slope, mattr_slope, vocd_slope, ttr_slope) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure,scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Children's Slope",
subtitle = "CHILDES: 14 ~ 58 Months")

# variance of mother's intercept
ldp_intercept %>%
ungroup(.)%>%
gather(measure, value,
mom_mtld_intercept, mom_mattr_intercept, mom_vocd_intercept, mom_ttr_intercept) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
variance = sd/mean)
## # A tibble: 4 x 4
## measure mean sd variance
## <chr> <dbl> <dbl> <dbl>
## 1 mom_mattr_intercept 0.5634104 0.02346517 0.04164845
## 2 mom_mtld_intercept 31.4531261 5.18189710 0.16474983
## 3 mom_ttr_intercept 0.1591607 0.04063250 0.25529233
## 4 mom_vocd_intercept 34.2590134 0.56077619 0.01636872
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mom_mtld_intercept, mom_mattr_intercept, mom_vocd_intercept, mom_ttr_intercept) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
variance = sd/mean)
## # A tibble: 4 x 4
## measure mean sd variance
## <chr> <dbl> <dbl> <dbl>
## 1 mom_mattr_intercept 0.5759650 0.02273538 0.03947355
## 2 mom_mtld_intercept 35.1003863 4.86577175 0.13862445
## 3 mom_ttr_intercept 0.2242398 0.08056756 0.35929202
## 4 mom_vocd_intercept 33.3979945 0.77503456 0.02320602
ldp_intercept %>%
gather(measure, value,
mom_mtld_intercept, mom_mattr_intercept, mom_vocd_intercept, mom_ttr_intercept) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure, scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Mother's Intercept",
subtitle = "LDP: 14 ~ 58 Months")

childes_intercept %>%
gather(measure, value,
mom_mtld_intercept, mom_mattr_intercept, mom_vocd_intercept, mom_ttr_intercept) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure, scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Mother's Intercept",
subtitle = "CHILDES: 14 ~ 58 Months")

# variance of mother's slope
ldp_intercept %>%
ungroup(.)%>%
gather(measure, value,
mom_mtld_slope, mom_mattr_slope, mom_vocd_slope, mom_ttr_slope) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
variance =sd/mean)
## # A tibble: 4 x 4
## measure mean sd variance
## <chr> <dbl> <dbl> <dbl>
## 1 mom_mattr_slope 0.06334250 0.01283598 0.2026441
## 2 mom_mtld_slope 14.85263380 2.76469332 0.1861416
## 3 mom_ttr_slope 0.04593157 0.05091364 1.1084672
## 4 mom_vocd_slope 0.59907406 0.31652406 0.5283555
childes_intercept %>%
ungroup(.)%>%
gather(measure, value,
mom_mtld_slope, mom_mattr_slope, mom_vocd_slope, mom_ttr_slope) %>%
group_by(measure)%>%
summarise(mean = mean(value, na.rm=TRUE),
sd = sd(value, na.rm=TRUE),
variance =sd/mean)
## # A tibble: 4 x 4
## measure mean sd variance
## <chr> <dbl> <dbl> <dbl>
## 1 mom_mattr_slope 0.06253947 0.02896008 0.4630688
## 2 mom_mtld_slope 14.32656189 3.49666646 0.2440688
## 3 mom_ttr_slope 0.07174337 0.04398974 0.6131541
## 4 mom_vocd_slope 1.09681359 1.05017068 0.9574742
ldp_intercept %>%
gather(measure, value,
mom_mtld_slope, mom_mattr_slope, mom_vocd_slope, mom_ttr_slope) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure,scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Mother's Slope",
subtitle = "LDP: 14 ~ 58 Months")

childes_intercept %>%
gather(measure, value,
mom_mtld_slope, mom_mattr_slope, mom_vocd_slope, mom_ttr_slope) %>%
ggplot(aes(x = value)) +
facet_grid(~ measure,scales = "free_x") +
geom_histogram()+
theme_classic()+
labs(title = "Variance of Mother's Slope",
subtitle = "CHILDES: 14 ~ 58 Months")

Compare by Correlation bewteen Parameters
# correlation plot of mother's intercept
ldp_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept, mom_vocd_intercept, mom_ttr_intercept)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")

childes_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept, mom_vocd_intercept, mom_ttr_intercept)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")

# correlation plot of child's intercept
ldp_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(cdi_intercept, ppvt_intercept, mtld_intercept,
mattr_intercept, vocd_intercept, ttr_intercept, sen_intercept)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")

childes_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mtld_intercept,mattr_intercept, vocd_intercept, ttr_intercept)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")

# correlation plot of mother's slope
ldp_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mom_mtld_slope, mom_mattr_slope, mom_vocd_slope, mom_ttr_slope)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")

childes_intercept%>%
ungroup()%>%
filter(complete.cases(.))%>%
select(mom_mtld_slope, mom_mattr_slope, mom_vocd_slope, mom_ttr_slope)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")

# correlation plot of child's slope
ldp_intercept%>%
filter(complete.cases(.))%>%
select(cdi_slope, ppvt_slope, mtld_slope, mattr_slope,
vocd_slope, ttr_slope, sen_slope)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")

childes_intercept%>%
filter(complete.cases(.))%>%
select(mtld_slope, mattr_slope,vocd_slope, ttr_slope)%>%
cor() %>%
corrplot::corrplot(method = "number", type = "upper")

# plot all parameters of children
ldp_intercept%>%
filter(complete.cases(.))%>%
select(cdi_intercept, ppvt_intercept, mtld_intercept, mattr_intercept,
vocd_intercept, ttr_intercept, sen_intercept,cdi_slope, ppvt_slope,
mtld_slope, mattr_slope, vocd_slope, ttr_slope, sen_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")

childes_intercept%>%
filter(complete.cases(.))%>%
select(mtld_intercept, mattr_intercept,vocd_intercept, ttr_intercept,
mtld_slope, mattr_slope, vocd_slope, ttr_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")

# plot all parametes of mothers
ldp_intercept%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept, mom_vocd_intercept, mom_ttr_intercept,
mom_mtld_slope, mom_mattr_slope, mom_vocd_slope, mom_ttr_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")

childes_intercept%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept, mom_vocd_intercept, mom_ttr_intercept,
mom_mtld_slope, mom_mattr_slope, mom_vocd_slope, mom_ttr_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")

# plot parameters of child and mother
ldp_intercept%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept, mom_vocd_intercept, mom_ttr_intercept,
mom_mtld_slope, mom_mattr_slope, mom_vocd_slope, mom_ttr_slope,
mtld_intercept, mtld_slope, mattr_slope, vocd_slope, ttr_slope, sen_intercept, sen_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")

childes_intercept%>%
filter(complete.cases(.))%>%
select(mom_mtld_intercept, mom_mattr_intercept, mom_vocd_intercept, mom_ttr_intercept,
mom_mtld_slope, mom_mattr_slope, mom_vocd_slope, mom_ttr_slope,
mtld_intercept,mattr_intercept, vocd_intercept, ttr_intercept,
mtld_slope, mattr_slope, vocd_slope, ttr_slope)%>%
cor() %>%
corrplot::corrplot(method = "square", type="upper")
